In [1]:
# http://127.0.0.1:8888/?token=d8645dfa36d67d3f2ca03610ce0c3c8e01cc36bd6dfab090
import utils
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
% matplotlib inline
import seaborn as sns
sns.set(style="whitegrid", color_codes=True)
sns.set(font_scale=1.4)
color = sns.color_palette()
from sklearn.decomposition import PCA

读取数据以及数据预览


In [2]:
train_data = pd.read_csv(utils.file_train_data)
test_data = pd.read_csv(utils.file_test_data)
print('Shape train_data: {}\nShape test_data: {}'.format(train_data.shape,test_data.shape))
Shape train_data: (296104, 93)
Shape test_data: (211216, 90)
In [ ]:
# train_data.head()
In [ ]:
# test_data.head()
In [3]:
train_data.describe().T
Out[3]:
count mean std min 25% 50% 75% max
id 296104.0 148051.500000 85478.006392 0.000000 74025.750000 148051.500000 222077.250000 296103.000000
feature0 296104.0 -0.183571 0.543399 -0.909369 -0.592915 -0.300706 0.091880 6.122587
feature1 296104.0 -0.029674 0.711443 -5.476752 -0.448864 -0.043037 0.396922 7.128280
feature2 296104.0 0.125526 0.995027 -1.396379 -0.816543 0.185965 1.016192 2.047605
feature3 296104.0 0.033011 0.691771 -5.901205 -0.382361 0.048724 0.459266 5.032245
feature4 296104.0 -0.203621 0.864182 -7.367753 -0.726691 -0.134526 0.380169 5.668391
feature5 296104.0 0.075024 0.842021 -4.711816 -0.409864 0.194909 0.645611 4.292053
feature6 296104.0 -0.109565 0.735551 -0.975877 -0.651759 -0.300176 0.228569 9.578376
feature7 296104.0 0.031580 1.015734 -1.795687 -0.644248 -0.133470 0.509037 138.195869
feature8 296104.0 -0.062301 0.662345 -5.537757 -0.475774 -0.227029 0.251955 8.512577
feature9 296104.0 -0.029480 0.823770 -1.119937 -0.568208 -0.236239 0.269839 42.915253
feature10 296104.0 0.134743 0.819226 -8.370453 -0.257942 0.315787 0.737641 1.180661
feature11 296104.0 0.042284 0.813013 -5.032668 -0.421695 0.160982 0.582287 5.652936
feature12 296104.0 -0.287851 0.536959 -1.332142 -0.661958 -0.398801 -0.037978 8.632872
feature13 296104.0 0.134281 0.843702 -5.048451 -0.380121 0.270262 0.807097 1.368804
feature14 296104.0 -0.218888 0.511896 -1.182264 -0.579379 -0.325173 0.022550 5.178478
feature15 296104.0 0.021922 0.992827 -1.569579 -0.642727 -0.170519 0.461657 107.235899
feature16 296104.0 -0.021746 1.002926 -1.506111 -0.989577 0.017215 0.885687 1.819687
feature17 296104.0 0.010967 0.844496 -7.726562 -0.469284 0.012253 0.464220 5.755896
feature18 296104.0 -0.205377 0.506246 -0.895607 -0.587022 -0.309486 0.057961 4.554357
feature19 296104.0 0.034886 0.986885 -3.475292 -0.804844 -0.137693 0.529407 3.199119
feature20 296104.0 0.159585 0.864549 -3.502469 -0.369318 0.298128 0.741326 4.903205
feature21 296104.0 -0.003819 0.957927 -1.485827 -0.664361 -0.213995 0.419588 66.662960
feature22 296104.0 0.166899 0.834514 -4.221023 -0.335627 0.275047 0.809949 1.514136
feature23 296104.0 0.032347 0.923867 -1.339318 -0.541172 -0.161272 0.351109 96.059314
feature24 296104.0 0.041682 0.987508 -4.040463 -0.588860 0.119584 0.744837 2.614638
feature25 296104.0 0.034161 0.947729 -1.453175 -0.557741 -0.159344 0.368737 90.015426
feature26 296104.0 -0.172261 0.535363 -1.105237 -0.546066 -0.291148 0.067803 6.390632
feature27 296104.0 0.045064 1.011608 -1.616735 -0.901558 0.219861 0.950826 1.500898
feature28 296104.0 0.001284 1.008552 -8.135105 -0.768816 -0.004023 0.781173 5.209362
feature29 296104.0 -0.117748 0.708018 -3.379562 -0.552009 -0.205651 0.259997 8.643994
feature30 296104.0 -0.046929 0.782376 -3.912258 -0.542473 -0.042278 0.438063 5.213455
feature31 296104.0 0.017039 0.407673 -31.478313 0.013202 0.041997 0.052979 11.230591
feature32 296104.0 0.253549 0.861229 -3.216887 -0.291370 0.387550 0.915721 1.658518
feature33 296104.0 -0.003185 0.413194 -17.828915 -0.073504 -0.000772 0.065970 20.366391
feature34 296104.0 0.014200 0.900434 -3.029178 -0.573636 -0.145004 0.482867 9.166695
feature35 296104.0 0.049763 1.101679 -2.203687 -0.684091 -0.234169 0.445217 8.134445
feature36 296104.0 0.022578 0.728660 -6.310261 -0.417689 0.042060 0.458774 5.427633
feature37 296104.0 0.006329 0.822281 -1.103804 -0.519778 -0.188625 0.292734 47.874702
feature38 296104.0 -0.006527 0.270997 -19.751627 -0.007765 0.011539 0.018472 25.525766
feature39 296104.0 -0.017356 0.997913 -1.433258 -0.983143 -0.033013 0.878081 1.914147
feature40 296104.0 0.047836 0.829973 -9.000676 -0.323742 0.215528 0.574945 4.628407
feature41 296104.0 0.026715 0.996007 -1.819776 -0.651233 -0.133532 0.515966 140.821799
feature42 296104.0 -0.066082 0.719799 -4.258327 -0.516803 -0.069030 0.385464 5.226411
feature43 296104.0 -0.139883 0.561865 -0.862242 -0.559153 -0.271207 0.136709 6.484323
feature44 296104.0 -0.243170 0.576760 -1.023525 -0.674143 -0.360000 0.054720 7.373821
feature45 296104.0 -0.204646 0.734938 -1.103516 -0.746034 -0.390442 0.148137 9.780131
feature46 296104.0 -0.141770 0.938119 -1.950669 -0.860258 -0.315680 0.414789 4.323079
feature47 296104.0 0.012284 0.959821 -1.593069 -0.650549 -0.172049 0.466672 109.988019
feature48 296104.0 -0.053206 0.907773 -1.359467 -0.672880 -0.268793 0.328832 52.505745
feature49 296104.0 -0.010648 0.759059 -8.184500 -0.469499 0.008313 0.478568 5.920332
feature50 296104.0 0.111414 0.830989 -8.433196 -0.243248 0.316010 0.710957 1.082826
feature51 296104.0 0.010937 1.009464 -1.468187 -0.657534 -0.210079 0.420748 64.842088
feature52 296104.0 -0.068484 0.968170 -1.218635 -0.660990 -0.313555 0.237445 55.230215
feature53 296104.0 0.142515 0.827527 -7.239803 -0.278095 0.319408 0.764109 1.231078
feature54 296104.0 -0.111154 0.790744 -1.155600 -0.630590 -0.309698 0.184717 38.472682
feature55 296104.0 0.162471 1.079717 -0.764998 -0.468998 -0.152673 0.403213 39.667184
feature56 296104.0 -0.017154 1.003669 -1.878774 -0.873990 0.295274 0.906233 1.109596
feature57 296104.0 0.067450 0.886473 -3.287221 -0.535967 0.031123 0.614553 6.342691
feature58 296104.0 -0.243379 0.483325 -1.160686 -0.583825 -0.338824 -0.009763 4.867819
feature59 296104.0 -0.015328 0.754445 -2.238767 -0.519141 -0.127774 0.366122 12.295002
feature60 296104.0 0.022843 0.983651 -1.982333 -0.641443 -0.122955 0.520295 140.357198
feature61 296104.0 -0.019805 0.804932 -5.838617 -0.482925 -0.045571 0.447511 7.517588
feature62 296104.0 -0.025714 1.004980 -1.377447 -0.680899 -0.266721 0.352118 51.694204
feature63 296104.0 -0.013411 0.925935 -4.794448 -0.627191 -0.023178 0.593333 4.989511
feature64 296104.0 -0.130281 0.614094 -3.825596 -0.495170 -0.217638 0.199740 5.769770
feature65 296104.0 -0.113602 0.621439 -3.967227 -0.475925 -0.220300 0.213118 6.129736
feature66 296104.0 0.019360 0.822222 -1.143547 -0.504599 -0.164126 0.312332 75.669124
feature67 296104.0 -0.073880 0.874100 -1.162444 -0.639429 -0.306691 0.224548 40.575600
feature68 296104.0 0.025086 1.020661 -1.671938 -0.861086 0.019684 0.901607 1.858950
feature69 296104.0 -0.123372 0.583770 -0.863080 -0.556987 -0.263335 0.163070 6.718955
feature70 296104.0 0.054319 0.813415 -8.515094 -0.334704 0.225381 0.511194 6.171609
feature71 296104.0 -0.043768 0.984257 -1.370808 -0.990262 -0.121752 0.812048 2.077971
feature72 296104.0 0.068552 0.706826 -5.021674 -0.386418 0.072232 0.540337 3.703555
feature73 296104.0 -0.028664 0.999700 -1.564874 -0.992426 0.064016 0.882528 1.590975
feature74 296104.0 -0.143581 0.615558 -1.047975 -0.576125 -0.278765 0.133207 7.960679
feature75 296104.0 0.063107 0.724239 -5.113423 -0.396515 0.070117 0.540182 4.659870
feature76 296104.0 0.046710 0.811647 -5.801529 -0.391368 0.183490 0.556098 6.110127
feature77 296104.0 -0.109873 0.804883 -1.003281 -0.688080 -0.327509 0.234704 10.800532
feature78 296104.0 -0.250631 0.760417 -1.486117 -0.775629 -0.440404 0.071029 10.766136
feature79 296104.0 -0.053381 0.723106 -4.872219 -0.499015 -0.056337 0.394760 6.308445
feature80 296104.0 -0.020858 0.744766 -5.881591 -0.451267 -0.038724 0.415900 6.982908
feature81 296104.0 -0.080579 0.611527 -3.616785 -0.411814 -0.228908 0.223299 7.581303
feature82 296104.0 -0.039366 0.893178 -6.026966 -0.501356 -0.040645 0.464395 8.808456
feature83 296104.0 -0.175156 0.672372 -0.559825 -0.437942 -0.352096 -0.160517 16.267948
feature84 296104.0 0.134338 0.850218 -5.750424 -0.355108 0.296000 0.800862 1.324128
feature85 296104.0 -0.057517 0.720766 -2.365965 -0.537804 -0.163466 0.300624 10.478941
feature86 296104.0 0.047995 0.857993 -8.723863 -0.274470 0.276330 0.635396 1.017918
feature87 296104.0 0.025897 0.999262 -1.965582 -0.637014 -0.123305 0.515613 138.740564
weight 296104.0 3.986947 3.707121 0.000000 1.000000 3.000000 5.000000 63.000000
label 296104.0 0.525653 0.499342 0.000000 0.000000 1.000000 1.000000 1.000000
group 296104.0 13.139036 7.211247 1.000000 7.000000 13.000000 18.000000 28.000000
era 296104.0 10.053626 5.604211 1.000000 5.000000 10.000000 15.000000 20.000000
In [4]:
test_data.describe().T
Out[4]:
count mean std min 25% 50% 75% max
id 211216.0 401711.500000 60972.951566 296104.000000 348907.750000 401711.500000 454515.250000 507319.000000
feature0 211216.0 -0.035574 0.947302 -0.908934 -0.580217 -0.285534 0.210596 58.233287
feature1 211216.0 0.010545 0.791918 -7.815074 -0.379420 -0.016631 0.381902 97.027659
feature2 211216.0 -0.327158 0.914016 -1.395470 -1.165517 -0.534441 0.369154 2.020182
feature3 211216.0 -0.017546 0.791231 -79.439025 -0.379854 0.021425 0.381020 6.282057
feature4 211216.0 -0.228328 1.023757 -34.355600 -0.736658 -0.138028 0.336036 10.948480
feature5 211216.0 0.258821 0.882279 -4.467673 -0.175013 0.345001 0.694760 63.565398
feature6 211216.0 -0.145313 0.719110 -0.975656 -0.626224 -0.336481 0.117843 29.182996
feature7 211216.0 -0.008562 0.882051 -1.806166 -0.572957 -0.137408 0.408501 107.215969
feature8 211216.0 -0.066609 0.824557 -11.348923 -0.451018 -0.227702 0.188136 148.906687
feature9 211216.0 0.103592 0.778179 -1.120370 -0.410493 -0.050938 0.436632 65.378774
feature10 211216.0 0.201357 0.820251 -10.641213 -0.157039 0.415127 0.787760 1.180512
feature11 211216.0 0.198868 0.806794 -5.753028 -0.174664 0.299451 0.597068 75.293531
feature12 211216.0 -0.030118 1.096995 -1.291953 -0.656437 -0.239242 0.381455 59.818751
feature13 211216.0 0.262846 0.804347 -5.916139 -0.125253 0.436246 0.842364 1.368025
feature14 211216.0 -0.145963 1.025300 -1.173426 -0.668922 -0.373504 0.083706 86.035656
feature15 211216.0 0.013686 0.891899 -1.569478 -0.552419 -0.137311 0.417173 119.129825
feature16 211216.0 -0.057250 0.985550 -1.506477 -1.002969 -0.034333 0.817833 1.819918
feature17 211216.0 0.006811 0.778326 -20.574851 -0.359896 0.002309 0.379648 9.322136
feature18 211216.0 0.007624 0.905061 -0.895172 -0.540271 -0.221589 0.295491 45.128635
feature19 211216.0 -0.220203 0.951162 -3.475528 -0.805733 -0.138693 0.528293 3.198546
feature20 211216.0 0.277752 0.887638 -3.201328 -0.174042 0.340780 0.723219 52.529545
feature21 211216.0 0.058598 0.950797 -1.502135 -0.523641 -0.103526 0.457878 126.792382
feature22 211216.0 0.226542 0.939487 -4.456527 -0.220175 0.433326 0.914665 1.513351
feature23 211216.0 -0.039776 0.704006 -1.349259 -0.480679 -0.151367 0.273448 97.976811
feature24 211216.0 -0.248824 0.967183 -4.041931 -0.872699 -0.184895 0.434636 2.613188
feature25 211216.0 -0.040655 0.719503 -1.463796 -0.489254 -0.144061 0.288601 96.545593
feature26 211216.0 -0.206097 0.826243 -1.088007 -0.632897 -0.362502 0.045287 109.066151
feature27 211216.0 -0.053192 0.971425 -1.617888 -0.964795 0.075213 0.814259 1.500794
feature28 211216.0 -0.114207 0.924428 -5.045426 -0.805029 -0.125375 0.528350 4.661630
feature29 211216.0 0.178801 1.084632 -3.473556 -0.439650 -0.021979 0.632082 57.542810
feature30 211216.0 0.217539 0.975535 -3.984002 -0.318602 0.145537 0.678795 61.487300
feature31 211216.0 -0.010623 1.018546 -83.281811 -0.003237 0.040781 0.051419 85.149242
feature32 211216.0 0.116013 0.970406 -3.366836 -0.454660 0.270378 0.874945 1.657193
feature33 211216.0 -0.023038 0.681777 -36.181838 -0.131623 -0.011326 0.094456 41.456116
feature34 211216.0 0.244725 1.052457 -2.820720 -0.390311 0.065025 0.706973 51.201414
feature35 211216.0 -0.106873 0.854785 -2.209297 -0.680124 -0.310150 0.239556 8.112092
feature36 211216.0 -0.007496 0.818150 -96.657216 -0.380530 0.010216 0.385501 7.644888
feature37 211216.0 0.008930 0.704218 -1.116082 -0.428067 -0.122379 0.296153 105.109480
feature38 211216.0 0.004836 1.164039 -53.313933 -0.021674 0.010242 0.017703 117.673340
feature39 211216.0 -0.064887 0.975194 -1.433867 -1.006849 -0.094656 0.797940 1.908379
feature40 211216.0 0.094758 0.828905 -14.696965 -0.214415 0.294134 0.568080 73.762115
feature41 211216.0 -0.003702 0.900955 -1.830432 -0.577345 -0.134119 0.420952 109.176199
feature42 211216.0 0.143716 0.905052 -5.366172 -0.321201 0.062982 0.500933 67.851744
feature43 211216.0 -0.107698 0.795153 -0.862022 -0.561738 -0.298726 0.134790 77.508737
feature44 211216.0 0.079151 0.981670 -1.023296 -0.566749 -0.136835 0.504262 42.212418
feature45 211216.0 0.315974 1.177971 -1.103489 -0.504572 0.028657 0.863710 40.931645
feature46 211216.0 -0.116337 0.921182 -1.932083 -0.809499 -0.268972 0.421440 4.377631
feature47 211216.0 0.023313 0.928014 -1.592902 -0.555395 -0.131406 0.433730 122.097385
feature48 211216.0 0.180838 1.048776 -1.360052 -0.449721 -0.010283 0.578254 68.393772
feature49 211216.0 -0.217792 1.033761 -44.291749 -0.725556 -0.143040 0.358875 7.999039
feature50 211216.0 0.182611 0.826795 -17.636041 -0.124599 0.412334 0.753529 1.082867
feature51 211216.0 0.043373 0.888794 -1.484104 -0.523951 -0.112929 0.436943 123.492239
feature52 211216.0 0.331887 1.054959 -1.205910 -0.353507 0.114338 0.748487 71.831239
feature53 211216.0 0.209711 0.808406 -8.479665 -0.170626 0.412380 0.799856 1.231191
feature54 211216.0 0.375789 1.254701 -1.148082 -0.314099 0.142811 0.761433 71.835061
feature55 211216.0 -0.305463 0.626576 -0.764090 -0.636348 -0.494446 -0.215197 34.771202
feature56 211216.0 -0.015526 0.993970 -1.878448 -0.849416 0.291080 0.890822 1.109362
feature57 211216.0 0.285255 0.990479 -2.851415 -0.280401 0.204522 0.764100 53.634951
feature58 211216.0 -0.105780 1.041485 -1.188632 -0.658886 -0.352241 0.161422 74.275392
feature59 211216.0 -0.225592 0.751182 -3.872828 -0.660545 -0.256045 0.137459 20.097119
feature60 211216.0 -0.004753 0.895100 -1.983612 -0.581043 -0.116455 0.441905 107.639949
feature61 211216.0 -0.006970 0.773737 -11.621968 -0.399258 -0.002923 0.379709 16.078309
feature62 211216.0 0.150831 0.932783 -1.378030 -0.467704 -0.028274 0.558308 67.362748
feature63 211216.0 -0.210357 0.986555 -4.489587 -0.882557 -0.200682 0.460489 4.624413
feature64 211216.0 0.081905 1.002116 -4.817014 -0.422141 -0.140051 0.360055 61.773616
feature65 211216.0 0.021659 0.968050 -5.736574 -0.421017 -0.190879 0.274825 71.913080
feature66 211216.0 -0.017109 0.684875 -1.144665 -0.444108 -0.144422 0.273233 92.537119
feature67 211216.0 0.300596 1.010537 -1.157485 -0.364246 0.088976 0.709157 93.132507
feature68 211216.0 0.175897 0.951701 -1.673542 -0.640663 0.239453 0.900356 1.857923
feature69 211216.0 -0.124543 0.746181 -0.862860 -0.565619 -0.300552 0.124617 97.020707
feature70 211216.0 0.107568 0.823407 -9.893484 -0.233558 0.293850 0.513933 100.456318
feature71 211216.0 -0.051433 0.983256 -1.370874 -1.000260 -0.130991 0.818615 2.046258
feature72 211216.0 -0.206526 0.979926 -56.603419 -0.593023 -0.074811 0.330152 5.144567
feature73 211216.0 -0.032628 0.985749 -1.563922 -0.979013 0.072036 0.859956 1.591763
feature74 211216.0 -0.196936 0.856973 -1.048807 -0.658634 -0.376916 0.066613 137.455191
feature75 211216.0 -0.148710 0.975756 -64.498935 -0.508055 -0.024002 0.371208 4.291509
feature76 211216.0 0.155676 0.811259 -6.917250 -0.214867 0.283255 0.566776 88.624071
feature77 211216.0 -0.136185 0.795066 -1.002899 -0.670359 -0.345375 0.141790 10.677954
feature78 211216.0 0.155881 1.178265 -1.480610 -0.591708 -0.107110 0.665038 54.308393
feature79 211216.0 0.086726 0.881713 -6.196844 -0.363839 0.018260 0.450984 78.437895
feature80 211216.0 0.003477 0.820017 -9.740311 -0.379286 -0.004263 0.375958 120.566489
feature81 211216.0 -0.061622 0.787791 -7.373642 -0.382837 -0.229979 0.193930 98.150260
feature82 211216.0 0.055488 0.834493 -5.451441 -0.326486 0.054714 0.472862 7.018018
feature83 211216.0 0.138872 1.177534 -0.551034 -0.297765 -0.118170 0.181575 51.773750
feature84 211216.0 0.247405 0.778076 -6.330712 -0.151864 0.419788 0.829580 1.323349
feature85 211216.0 -0.228182 0.870494 -4.447651 -0.686052 -0.299643 0.107567 30.648646
feature86 211216.0 0.199950 0.823874 -8.713384 -0.069549 0.445678 0.751796 1.067016
feature87 211216.0 -0.007859 0.877902 -1.966866 -0.578599 -0.119059 0.434013 106.477375
group 211216.0 13.228160 7.197659 1.000000 7.000000 14.000000 18.000000 28.000000

数据预处理 train_data to X_train test_data to X_test


  • 去掉无用列
In [5]:
X_train = train_data.drop(['id','weight','era'], axis=1)
y_train = X_train.pop('label')
X_train_group = train_data['group']
X_test = test_data.drop(['id'], axis=1)
X_test_group = test_data['group']
  • One hot encoding
In [ ]:
# group_train_dummies = pd.get_dummies(X_train['group'],prefix='group')
# group_test_dummies = pd.get_dummies(X_test['group'],prefix='group')
In [ ]:
# X_train.drop(['group'],axis=1,inplace=True)
# X_test.drop(['group'],axis=1,inplace=True)
# X_train=X_train.join(group_train_dummies)
# X_test=X_test.join(group_test_dummies)

label列和group列以及weight列统计


In [6]:
train_data['label'].value_counts()
Out[6]:
1.0    155648
0.0    140456
Name: label, dtype: int64
In [7]:
print('label 1 ratio: {}\n'.format(sum(train_data['label']) / train_data.shape[0]))
print('label 0 ratio: {}\n'.format(len(train_data[train_data['label'] == 0]) / train_data.shape[0]))
label 1 ratio: 0.525653148893632

label 0 ratio: 0.47434685110636804

In [8]:
sns.set(font_scale=1.8)
int_level = train_data['label'].value_counts()
plt.figure(figsize=(20,5))
sns.barplot(int_level.index, int_level.values, alpha=0.8, color='b')
plt.title('Counts of label in train_data')
plt.show()
In [9]:
train_data['group'].value_counts()
Out[9]:
6     26720
15    23939
7     21574
13    16973
18    16026
17    16018
21    14186
4     13534
16    11945
1     10805
14    10686
8     10643
5      9593
3      9315
22     8852
12     8117
19     7825
28     7722
23     7548
11     7499
24     6499
10     5696
27     5194
20     5079
9      4430
25     4134
2      3392
26     2160
Name: group, dtype: int64
In [10]:
test_data['group'].value_counts()
Out[10]:
15    19098
6     18720
7     17029
21    11498
17    11478
18    11405
13     9688
4      9623
16     9207
1      7200
14     7149
12     6898
3      6869
22     6844
8      6690
5      6222
28     6104
19     5490
23     4789
11     4674
24     4217
10     4051
27     3691
20     3283
9      3182
25     2277
2      2196
26     1644
Name: group, dtype: int64
In [11]:
sns.set(font_scale=1.8)
int_level = train_data['group'].value_counts()
plt.figure(figsize=(20,5))
sns.barplot(int_level.index, int_level.values, alpha=0.8, color='b')
plt.title('Counts of group in train_data')
plt.show()
In [12]:
sns.set(font_scale=1.8)
int_level = test_data['group'].value_counts()
plt.figure(figsize=(20,5))
sns.barplot(int_level.index, int_level.values, alpha=0.8, color='r')
plt.title('Counts of group in test_data')
plt.show()
In [13]:
train_data['weight'].value_counts()
Out[13]:
1.0     51625
2.0     46494
3.0     39878
4.0     32546
0.0     26671
5.0     25191
6.0     19463
7.0     14307
8.0     10432
9.0      7774
10.0     5570
11.0     4075
12.0     2961
13.0     2215
14.0     1664
15.0     1170
16.0      870
17.0      657
18.0      486
19.0      393
20.0      280
21.0      239
22.0      167
23.0      144
24.0      117
26.0      103
25.0      101
27.0       82
28.0       49
30.0       43
29.0       39
31.0       35
32.0       35
33.0       34
35.0       28
34.0       24
37.0       21
38.0       15
40.0       13
39.0       12
36.0       11
43.0        8
41.0        7
47.0        7
42.0        6
44.0        6
45.0        5
49.0        4
46.0        3
48.0        3
52.0        3
51.0        3
53.0        3
60.0        2
56.0        2
57.0        2
61.0        2
50.0        1
55.0        1
54.0        1
63.0        1
Name: weight, dtype: int64
In [14]:
sns.set(font_scale=2.2)
int_level = train_data['weight'].value_counts()
plt.figure(figsize=(50,20))
sns.barplot(int_level.index, int_level.values, alpha=0.8, color='b')
plt.title('Counts of weight in train_data')
plt.show()

Feature Correlation 以及 Importance


In [15]:
train_data = train_data.drop(['id','weight','era'], axis=1)
In [16]:
corrmat = train_data.corr()
#f, ax = plt.subplots(figsize=(500, 500))
#sns.heatmap(corrmat, vmax=.8, square=True);
In [17]:
k = 15 #number of variables for heatmap
plt.figure(figsize=(20, 15))
cols = corrmat.nlargest(k, 'label')['label'].index
cm = np.corrcoef(train_data[cols].values.T)
sns.set(font_scale=1.75)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, 
fmt='.4f', annot_kws={'size': 15}, yticklabels=cols.values, 
xticklabels=cols.values)
plt.show()
In [18]:
numeric_features = train_data.select_dtypes(include=[np.number])
numeric_features.dtypes
corr = numeric_features.corr()
print("%d features in total" %(len(corr)))
print (corr['label'].sort_values(ascending=False)[:90], '\n')
#print (corr['label'].sort_values(ascending=False)[-5:])
90 features in total
label        1.000000
feature65    0.096309
feature79    0.092043
feature8     0.086638
feature80    0.085634
feature64    0.083743
feature61    0.083401
feature1     0.082987
feature81    0.081627
feature76    0.081003
feature42    0.080956
feature43    0.078274
feature0     0.077506
feature40    0.076583
feature18    0.073613
feature70    0.073127
feature11    0.068062
feature6     0.065869
feature30    0.064371
feature5     0.062039
feature69    0.061564
feature29    0.061475
feature44    0.055733
feature53    0.052928
feature13    0.051797
feature84    0.051731
feature50    0.050753
feature22    0.047678
feature10    0.044225
feature67    0.041659
feature9     0.039655
feature37    0.038421
feature54    0.038160
feature48    0.037919
feature62    0.037805
feature52    0.036996
feature20    0.035856
feature77    0.035788
feature51    0.033471
feature21    0.033105
feature66    0.031083
feature15    0.030819
feature47    0.030375
feature86    0.028396
feature58    0.028250
feature68    0.027396
feature7     0.027392
feature41    0.027253
feature82    0.026371
feature60    0.025826
feature57    0.025771
feature87    0.025723
feature14    0.025694
feature32    0.025156
feature25    0.025058
feature4     0.024202
feature23    0.023922
feature55    0.022005
feature26    0.016959
feature12    0.015246
feature39    0.015175
feature34    0.014777
feature45    0.013614
feature74    0.012857
feature71    0.007865
feature16    0.001705
group       -0.002730
feature46   -0.003047
feature27   -0.003991
feature38   -0.004804
feature85   -0.007902
feature78   -0.011094
feature35   -0.011132
feature73   -0.011343
feature56   -0.011838
feature31   -0.012746
feature2    -0.016984
feature49   -0.017033
feature83   -0.017191
feature63   -0.026447
feature24   -0.029036
feature19   -0.029326
feature33   -0.031288
feature72   -0.034152
feature59   -0.042663
feature28   -0.056204
feature75   -0.062988
feature17   -0.063448
feature3    -0.065088
feature36   -0.072447
Name: label, dtype: float64 

查看Feature分布


In [46]:
sns.set(font_scale=1.8)
plt.figure(figsize=(20, 10))
sns.distplot(X_train['feature0'], bins=100 ,kde_kws={'lw': 2.5, 'color': 'k', 'alpha': 0.7, 'label': 'KDE'}, hist_kws={ 'histtype': 'step', 'lw': 3, 'color': 'b', 'alpha': 0.9, 'label': 'feature0'})
plt.title('Distribution of Feature0 in X_train')
plt.show()
In [47]:
sns.set(font_scale=1.8)
plt.figure(figsize=(20, 10))
sns.distplot(X_test['feature0'], bins=100 ,kde_kws={'lw': 2.5, 'color': 'k', 'alpha': 0.7, 'label': 'KDE'}, hist_kws={ 'histtype': 'step', 'lw': 3, 'color': 'r', 'alpha': 0.9, 'label': 'feature0'})
plt.show()
In [49]:
sns.set(font_scale=1.8)
plt.figure(figsize=(20, 10))
ax1 = sns.distplot(X_train['feature0'], bins=100, kde=True, color='b', kde_kws={'lw': 3, 'label':'X_train'}, hist=False)
ax2 = sns.distplot(X_test['feature0'], bins=100, kde=True, color='r', kde_kws={'lw': 3, 'label':'X_test'}, hist=False)
plt.title('Distribution of Feature0 in X_train and X_test');
ax1.set_xlim([-2,5])
ax2.set_xlim([-2,5])
plt.show()
In [22]:
sns.set(font_scale=1.8)
plt.figure(figsize=(20, 10))
plt.scatter(range(X_train.shape[0]), train_data["feature0"].values, color='m')
plt.title('Sactter distribution of Feature0 in X_train')
plt.show()
In [23]:
sns.set(font_scale=1.8)
plt.figure(figsize=(20, 10))
plt.scatter(range(X_test.shape[0]), test_data["feature0"].values, color='g')
plt.title('Sactter distribution of Feature0 in X_test')
plt.show()
  • 训练集和测试集feature0-87以及group分布对比
In [50]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2, figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[0], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[0], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [51]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[1], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[1], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [52]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[2], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[2], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [53]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[3], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[3], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [54]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[4], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[4], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [55]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[5], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[5], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [56]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[6], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[6], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [57]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[7], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[7], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [58]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[8], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[8], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [59]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[9], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[9], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [60]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[10], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[10], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [61]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[11], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[11], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [62]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[12], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[12], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [63]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[13], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[13], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [64]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[14], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[14], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [65]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[15], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[15], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [66]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[16], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[16], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [67]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[17], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[17], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [68]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[18], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[18], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [69]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[19], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[19], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [70]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[20], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[20], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [71]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[21], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[21], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [72]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[22], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[22], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [73]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[23], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[23], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [74]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[24], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[24], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [75]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[25], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[25], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [76]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[26], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[26], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [77]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[27], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[27], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [78]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[28], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[28], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [79]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[29], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[29], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [80]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[30], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[30], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [81]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[31], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[31], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [82]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[32], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[32], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [83]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[33], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[33], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [84]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[34], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[34], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [85]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[35], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[35], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [86]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[36], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[36], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [87]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[37], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[37], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [88]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[38], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[38], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [89]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[39], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[39], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [90]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[40], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[40], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [91]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[41], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[41], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [92]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[42], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[42], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [93]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[43], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[43], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [94]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[44], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[44], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [95]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[45], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[45], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [96]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[46], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[46], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [97]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[47], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[47], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [98]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[48], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[48], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [99]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[49], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[49], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [100]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[50], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[50], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [101]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[51], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[51], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [102]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[52], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[52], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [103]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[53], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[53], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [104]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[54], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[54], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [105]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[55], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[55], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [106]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[56], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[56], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [107]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[57], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[57], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [108]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[58], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[58], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [109]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[59], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[59], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [110]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[60], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[60], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [111]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[61], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[61], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [112]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[62], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[62], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [113]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[63], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[63], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [114]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[64], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[64], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [97]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[65], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[65], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
Out[97]:
(-3, 3)
In [115]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[66], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[66], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [116]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[67], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[67], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [117]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[68], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[68], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [118]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[69], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[69], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [119]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[70], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[70], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [120]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[71], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[71], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [121]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[72], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[72], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [122]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[73], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[73], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [123]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[74], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[74], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [124]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[75], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[75], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [125]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[76], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[76], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [126]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[77], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[77], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [127]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[78], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[78], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [128]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[79], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[79], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [129]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[80], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[80], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [130]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[81], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[81], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [131]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[82], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[82], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [132]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[83], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[83], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [133]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[84], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[84], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [134]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[85], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[85], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [135]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[86], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[86], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [136]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train.columns[87], data=X_train, ax=axes[0])
cx = sns.violinplot(y=X_test.columns[87], data=X_test, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
plt.show()
In [137]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=train_data['group'], data=X_train, ax=axes[0])
cx = sns.violinplot(y=test_data['group'], data=X_test, ax=axes[1])
bx.set_ylim([-1,28])
cx.set_ylim([-1,28])
plt.show()

按group列分割表


  • group=1
In [ ]:
X_train_1 = X_train[X_train['group'] == 1]
X_test_1 = X_test[X_test['group'] == 1]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_1.columns[0], data=X_train_1, ax=axes[0])
cx = sns.violinplot(y=X_test_1.columns[0], data=X_test_1, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_1.columns[1], data=X_train_1, ax=axes[0])
cx = sns.violinplot(y=X_test_1.columns[1], data=X_test_1, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_1.columns[2], data=X_train_1, ax=axes[0])
cx = sns.violinplot(y=X_test_1.columns[2], data=X_test_1, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_1.columns[3], data=X_train_1, ax=axes[0])
cx = sns.violinplot(y=X_test_1.columns[3], data=X_test_1, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=2
In [ ]:
X_train_2 = X_train[X_train['group'] == 2]
X_test_2 = X_test[X_test['group'] == 2]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_2.columns[3], data=X_train_2, ax=axes[0])
cx = sns.violinplot(y=X_test_2.columns[3], data=X_test_2, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=3
In [ ]:
X_train_3 = X_train[X_train['group'] == 3]
X_test_3 = X_test[X_test['group'] == 3]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_3.columns[3], data=X_train_3, ax=axes[0])
cx = sns.violinplot(y=X_test_3.columns[3], data=X_test_3, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=4
In [ ]:
X_train_4 = X_train[X_train['group'] == 4]
X_test_4 = X_test[X_test['group'] == 4]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_4.columns[3], data=X_train_4, ax=axes[0])
cx = sns.violinplot(y=X_test_4.columns[3], data=X_test_4, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=5
In [ ]:
X_train_5 = X_train[X_train['group'] == 5]
X_test_5 = X_test[X_test['group'] == 5]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_5.columns[3], data=X_train_5, ax=axes[0])
cx = sns.violinplot(y=X_test_5.columns[3], data=X_test_5, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=6
In [ ]:
X_train_6 = X_train[X_train['group'] == 6]
X_test_6 = X_test[X_test['group'] == 6]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_6.columns[3], data=X_train_6, ax=axes[0])
cx = sns.violinplot(y=X_test_6.columns[3], data=X_test_6, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=7
In [ ]:
X_train_7 = X_train[X_train['group'] == 7]
X_test_7 = X_test[X_test['group'] == 7]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_7.columns[3], data=X_train_7, ax=axes[0])
cx = sns.violinplot(y=X_test_7.columns[3], data=X_test_7, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=8
In [ ]:
X_train_8 = X_train[X_train['group'] == 8]
X_test_8 = X_test[X_test['group'] == 8]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_8.columns[3], data=X_train_8, ax=axes[0])
cx = sns.violinplot(y=X_test_8.columns[3], data=X_test_8, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=9
In [ ]:
X_train_9 = X_train[X_train['group'] == 9]
X_test_9 = X_test[X_test['group'] == 9]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_9.columns[3], data=X_train_9, ax=axes[0])
cx = sns.violinplot(y=X_test_9.columns[3], data=X_test_9, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=10
In [ ]:
X_train_10 = X_train[X_train['group'] == 10]
X_test_10 = X_test[X_test['group'] == 10]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_10.columns[3], data=X_train_10, ax=axes[0])
cx = sns.violinplot(y=X_test_10.columns[3], data=X_test_10, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=11
In [ ]:
X_train_11 = X_train[X_train['group'] == 11]
X_test_11 = X_test[X_test['group'] == 11]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_11.columns[3], data=X_train_11, ax=axes[0])
cx = sns.violinplot(y=X_test_11.columns[3], data=X_test_11, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=12
In [ ]:
X_train_12 = X_train[X_train['group'] == 12]
X_test_12 = X_test[X_test['group'] == 12]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_12.columns[3], data=X_train_12, ax=axes[0])
cx = sns.violinplot(y=X_test_12.columns[3], data=X_test_12, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=13
In [ ]:
X_train_13 = X_train[X_train['group'] == 13]
X_test_13 = X_test[X_test['group'] == 13]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_13.columns[3], data=X_train_13, ax=axes[0])
cx = sns.violinplot(y=X_test_13.columns[3], data=X_test_13, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=14
In [ ]:
X_train_14 = X_train[X_train['group'] == 14]
X_test_14 = X_test[X_test['group'] == 14]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_14.columns[3], data=X_train_14, ax=axes[0])
cx = sns.violinplot(y=X_test_14.columns[3], data=X_test_14, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=15
In [ ]:
X_train_15 = X_train[X_train['group'] == 15]
X_test_15 = X_test[X_test['group'] == 15]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_15.columns[3], data=X_train_15, ax=axes[0])
cx = sns.violinplot(y=X_test_15.columns[3], data=X_test_15, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=16
In [ ]:
X_train_16 = X_train[X_train['group'] == 16]
X_test_16 = X_test[X_test['group'] == 16]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_16.columns[3], data=X_train_16, ax=axes[0])
cx = sns.violinplot(y=X_test_16.columns[3], data=X_test_16, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=17
In [ ]:
X_train_17 = X_train[X_train['group'] == 17]
X_test_17 = X_test[X_test['group'] == 17]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_17.columns[3], data=X_train_17, ax=axes[0])
cx = sns.violinplot(y=X_test_17.columns[3], data=X_test_17, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=18
In [ ]:
X_train_18 = X_train[X_train['group'] == 18]
X_test_18 = X_test[X_test['group'] == 18]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_18.columns[3], data=X_train_18, ax=axes[0])
cx = sns.violinplot(y=X_test_18.columns[3], data=X_test_18, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=19
In [ ]:
X_train_19 = X_train[X_train['group'] == 19]
X_test_19 = X_test[X_test['group'] == 19]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_19.columns[3], data=X_train_19, ax=axes[0])
cx = sns.violinplot(y=X_test_19.columns[3], data=X_test_19, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=20
In [ ]:
X_train_20 = X_train[X_train['group'] == 20]
X_test_20 = X_test[X_test['group'] == 20]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_20.columns[3], data=X_train_20, ax=axes[0])
cx = sns.violinplot(y=X_test_20.columns[3], data=X_test_20, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=21
In [ ]:
X_train_21 = X_train[X_train['group'] == 21]
X_test_21 = X_test[X_test['group'] == 21]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_21.columns[3], data=X_train_21, ax=axes[0])
cx = sns.violinplot(y=X_test_21.columns[3], data=X_test_21, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=22
In [ ]:
X_train_22 = X_train[X_train['group'] == 22]
X_test_22 = X_test[X_test['group'] == 22]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_22.columns[3], data=X_train_22, ax=axes[0])
cx = sns.violinplot(y=X_test_22.columns[3], data=X_test_22, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=23
In [ ]:
X_train_23 = X_train[X_train['group'] == 23]
X_test_23 = X_test[X_test['group'] == 23]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_23.columns[3], data=X_train_23, ax=axes[0])
cx = sns.violinplot(y=X_test_23.columns[3], data=X_test_23, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=24
In [ ]:
X_train_24 = X_train[X_train['group'] == 24]
X_test_24 = X_test[X_test['group'] == 24]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_24.columns[3], data=X_train_24, ax=axes[0])
cx = sns.violinplot(y=X_test_24.columns[3], data=X_test_24, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=25
In [ ]:
X_train_25 = X_train[X_train['group'] == 25]
X_test_25 = X_test[X_test['group'] == 25]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_25.columns[3], data=X_train_25, ax=axes[0])
cx = sns.violinplot(y=X_test_25.columns[3], data=X_test_25, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=26
In [ ]:
X_train_26 = X_train[X_train['group'] == 26]
X_test_26 = X_test[X_test['group'] == 26]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_26.columns[3], data=X_train_26, ax=axes[0])
cx = sns.violinplot(y=X_test_26.columns[3], data=X_test_26, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=27
In [ ]:
X_train_27 = X_train[X_train['group'] == 27]
X_test_27 = X_test[X_test['group'] == 27]
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_27.columns[3], data=X_train_27, ax=axes[0])
cx = sns.violinplot(y=X_test_27.columns[3], data=X_test_27, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])
  • group=28
In [ ]:
sns.set(font_scale=1.4)
fg,axes = plt.subplots(1,2,figsize=(20, 5),sharex=True)
bx = sns.violinplot(y=X_train_26.columns[3], data=X_train_26, ax=axes[0])
cx = sns.violinplot(y=X_test_26.columns[3], data=X_test_26, ax=axes[1])
bx.set_ylim([-3,3])
cx.set_ylim([-3,3])

train_data.info()

In [ ]:
#train_data.isnull().sum()
In [ ]:
#table_type = train_data.dtypes.reset_index()
#table_type.columns=['feat','type']
#print(table_type.groupby('type').aggregate('count'))
In [ ]:
#sns.countplot(train_data.label,order=['0', '1']);
#plt.xlabel('label');
#plt.ylabel('Counts');

PCA处理

  • 对[group]列28个虚拟变量做PCA处理
In [ ]:
pca = PCA(n_components=28)
In [ ]:
pca.fit(group_train_dummie)
In [ ]:
print(pca.explained_variance_ratio_)
print(pca.explained_variance_)
print(pca.n_components_)
  • 对feature0-feature88做PCA处理
In [ ]:
pca = PCA(n_components=88)
In [ ]:
pca.fit_transform(X_train)
In [ ]:
print(pca.explained_variance_ratio_)
print(pca.explained_variance_)
print(pca.n_components_)
In [ ]:
pca = PCA(n_components=88)